In [1]:
# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
In [2]:
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.util import ngrams
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
In [4]:
nltk.download('all')
[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package biocreative_ppi is already up-to-date!
[nltk_data]    | Downloading package brown to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown is already up-to-date!
[nltk_data]    | Downloading package brown_tei to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown_tei is already up-to-date!
[nltk_data]    | Downloading package cess_cat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_cat is already up-to-date!
[nltk_data]    | Downloading package cess_esp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cess_esp is already up-to-date!
[nltk_data]    | Downloading package chat80 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package chat80 is already up-to-date!
[nltk_data]    | Downloading package city_database to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package city_database is already up-to-date!
[nltk_data]    | Downloading package cmudict to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package cmudict is already up-to-date!
[nltk_data]    | Downloading package comparative_sentences to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comparative_sentences is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package comtrans to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package comtrans is already up-to-date!
[nltk_data]    | Downloading package conll2000 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2000 is already up-to-date!
[nltk_data]    | Downloading package conll2002 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2002 is already up-to-date!
[nltk_data]    | Downloading package conll2007 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package conll2007 is already up-to-date!
[nltk_data]    | Downloading package crubadan to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package crubadan is already up-to-date!
[nltk_data]    | Downloading package dependency_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dependency_treebank is already up-to-date!
[nltk_data]    | Downloading package dolch to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package dolch is already up-to-date!
[nltk_data]    | Downloading package europarl_raw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package europarl_raw is already up-to-date!
[nltk_data]    | Downloading package floresta to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package floresta is already up-to-date!
[nltk_data]    | Downloading package framenet_v15 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v15 is already up-to-date!
[nltk_data]    | Downloading package framenet_v17 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package framenet_v17 is already up-to-date!
[nltk_data]    | Downloading package gazetteers to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gazetteers is already up-to-date!
[nltk_data]    | Downloading package genesis to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package genesis is already up-to-date!
[nltk_data]    | Downloading package gutenberg to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package gutenberg is already up-to-date!
[nltk_data]    | Downloading package ieer to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ieer is already up-to-date!
[nltk_data]    | Downloading package inaugural to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package inaugural is already up-to-date!
[nltk_data]    | Downloading package indian to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package indian is already up-to-date!
[nltk_data]    | Downloading package jeita to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package jeita is already up-to-date!
[nltk_data]    | Downloading package kimmo to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package kimmo is already up-to-date!
[nltk_data]    | Downloading package knbc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package knbc is already up-to-date!
[nltk_data]    | Downloading package lin_thesaurus to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package lin_thesaurus is already up-to-date!
[nltk_data]    | Downloading package mac_morpho to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mac_morpho is already up-to-date!
[nltk_data]    | Downloading package machado to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package machado is already up-to-date!
[nltk_data]    | Downloading package masc_tagged to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package masc_tagged is already up-to-date!
[nltk_data]    | Downloading package moses_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package moses_sample is already up-to-date!
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package movie_reviews is already up-to-date!
[nltk_data]    | Downloading package names to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package names is already up-to-date!
[nltk_data]    | Downloading package nombank.1.0 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nombank.1.0 is already up-to-date!
[nltk_data]    | Downloading package nps_chat to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nps_chat is already up-to-date!
[nltk_data]    | Downloading package omw to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package omw is already up-to-date!
[nltk_data]    | Downloading package opinion_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package opinion_lexicon is already up-to-date!
[nltk_data]    | Downloading package paradigms to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package paradigms is already up-to-date!
[nltk_data]    | Downloading package pil to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pil is already up-to-date!
[nltk_data]    | Downloading package pl196x to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pl196x is already up-to-date!
[nltk_data]    | Downloading package ppattach to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ppattach is already up-to-date!
[nltk_data]    | Downloading package problem_reports to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package problem_reports is already up-to-date!
[nltk_data]    | Downloading package propbank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package propbank is already up-to-date!
[nltk_data]    | Downloading package ptb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ptb is already up-to-date!
[nltk_data]    | Downloading package product_reviews_1 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_1 is already up-to-date!
[nltk_data]    | Downloading package product_reviews_2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package product_reviews_2 is already up-to-date!
[nltk_data]    | Downloading package pros_cons to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package pros_cons is already up-to-date!
[nltk_data]    | Downloading package qc to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package qc is already up-to-date!
[nltk_data]    | Downloading package reuters to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package reuters is already up-to-date!
[nltk_data]    | Downloading package rte to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rte is already up-to-date!
[nltk_data]    | Downloading package semcor to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package semcor is already up-to-date!
[nltk_data]    | Downloading package senseval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package senseval is already up-to-date!
[nltk_data]    | Downloading package sentiwordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentiwordnet is already up-to-date!
[nltk_data]    | Downloading package sentence_polarity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sentence_polarity is already up-to-date!
[nltk_data]    | Downloading package shakespeare to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package shakespeare is already up-to-date!
[nltk_data]    | Downloading package sinica_treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sinica_treebank is already up-to-date!
[nltk_data]    | Downloading package smultron to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package smultron is already up-to-date!
[nltk_data]    | Downloading package state_union to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package state_union is already up-to-date!
[nltk_data]    | Downloading package stopwords to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package stopwords is already up-to-date!
[nltk_data]    | Downloading package subjectivity to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package subjectivity is already up-to-date!
[nltk_data]    | Downloading package swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package swadesh is already up-to-date!
[nltk_data]    | Downloading package switchboard to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package switchboard is already up-to-date!
[nltk_data]    | Downloading package timit to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package timit is already up-to-date!
[nltk_data]    | Downloading package toolbox to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package toolbox is already up-to-date!
[nltk_data]    | Downloading package treebank to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package treebank is already up-to-date!
[nltk_data]    | Downloading package twitter_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package twitter_samples is already up-to-date!
[nltk_data]    | Downloading package udhr to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr is already up-to-date!
[nltk_data]    | Downloading package udhr2 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package udhr2 is already up-to-date!
[nltk_data]    | Downloading package unicode_samples to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package unicode_samples is already up-to-date!
[nltk_data]    | Downloading package universal_treebanks_v20 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_treebanks_v20 is already up-to-
[nltk_data]    |       date!
[nltk_data]    | Downloading package verbnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet is already up-to-date!
[nltk_data]    | Downloading package verbnet3 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package verbnet3 is already up-to-date!
[nltk_data]    | Downloading package webtext to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package webtext is already up-to-date!
[nltk_data]    | Downloading package wordnet to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet is already up-to-date!
[nltk_data]    | Downloading package wordnet_ic to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wordnet_ic is already up-to-date!
[nltk_data]    | Downloading package words to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package words is already up-to-date!
[nltk_data]    | Downloading package ycoe to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package ycoe is already up-to-date!
[nltk_data]    | Downloading package rslp to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package rslp is already up-to-date!
[nltk_data]    | Downloading package maxent_treebank_pos_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_treebank_pos_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package universal_tagset to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package universal_tagset is already up-to-date!
[nltk_data]    | Downloading package maxent_ne_chunker to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package maxent_ne_chunker is already up-to-date!
[nltk_data]    | Downloading package punkt to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package punkt is already up-to-date!
[nltk_data]    | Downloading package book_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package book_grammars is already up-to-date!
[nltk_data]    | Downloading package sample_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package sample_grammars is already up-to-date!
[nltk_data]    | Downloading package spanish_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package spanish_grammars is already up-to-date!
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package basque_grammars is already up-to-date!
[nltk_data]    | Downloading package large_grammars to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package large_grammars is already up-to-date!
[nltk_data]    | Downloading package tagsets to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package tagsets is already up-to-date!
[nltk_data]    | Downloading package snowball_data to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package snowball_data is already up-to-date!
[nltk_data]    | Downloading package bllip_wsj_no_aux to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package bllip_wsj_no_aux is already up-to-date!
[nltk_data]    | Downloading package word2vec_sample to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package word2vec_sample is already up-to-date!
[nltk_data]    | Downloading package panlex_swadesh to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package panlex_swadesh is already up-to-date!
[nltk_data]    | Downloading package mte_teip5 to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mte_teip5 is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package perluniprops to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package perluniprops is already up-to-date!
[nltk_data]    | Downloading package nonbreaking_prefixes to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package nonbreaking_prefixes is already up-to-date!
[nltk_data]    | Downloading package vader_lexicon to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package vader_lexicon is already up-to-date!
[nltk_data]    | Downloading package porter_test to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package porter_test is already up-to-date!
[nltk_data]    | Downloading package wmt15_eval to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package wmt15_eval is already up-to-date!
[nltk_data]    | Downloading package mwa_ppdb to
[nltk_data]    |     C:\Users\pawan\AppData\Roaming\nltk_data...
[nltk_data]    |   Package mwa_ppdb is already up-to-date!
[nltk_data]    | 
[nltk_data]  Done downloading collection all
Out[4]:
True
In [5]:
# path = '/content/drive/MyDrive/Files/'

path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
 
df_movies = pd.read_csv(path + 'ottmovies.csv')
 
df_movies.head()
Out[5]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type
0 1 Inception 2010 13+ 8.8 87% Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148.0 movie NaN 1 0 0 0 0
1 2 The Matrix 1999 16+ 8.7 88% Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136.0 movie NaN 1 0 0 0 0
2 3 Avengers: Infinity War 2018 13+ 8.4 85% Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149.0 movie NaN 1 0 0 0 0
3 4 Back to the Future 1985 7+ 8.5 96% Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116.0 movie NaN 1 0 0 0 0
4 5 The Good, the Bad and the Ugly 1966 16+ 8.8 97% Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161.0 movie NaN 1 0 1 0 0
In [6]:
# profile = ProfileReport(df_movies)
# profile
In [7]:
def data_investigate(df):
    print('No of Rows : ', df.shape[0])
    print('No of Coloums : ', df.shape[1])
    print('**'*25)
    print('Colums Names : \n', df.columns)
    print('**'*25)
    print('Datatype of Columns : \n', df.dtypes)
    print('**'*25)
    print('Missing Values : ')
    c = df.isnull().sum()
    c = c[c > 0]
    print(c)
    print('**'*25)
    print('Missing vaules %age wise :\n')
    print((100*(df.isnull().sum()/len(df.index))))
    print('**'*25)
    print('Pictorial Representation : ')
    plt.figure(figsize = (10, 10))
    sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
    plt.show()
In [8]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb               float64
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime            float64
Kind                object
Seasons            float64
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
dtype: object
**************************************************
Missing Values : 
Age                 8457
IMDb                 328
Rotten Tomatoes    10437
Directors            357
Cast                 648
Genres               234
Country              303
Language             437
Plotline            4958
Runtime              382
Seasons            16923
dtype: int64
**************************************************
Missing vaules %age wise :

ID                   0.000000
Title                0.000000
Year                 0.000000
Age                 49.973409
IMDb                 1.938191
Rotten Tomatoes     61.673462
Directors            2.109555
Cast                 3.829108
Genres               1.382734
Country              1.790463
Language             2.582284
Plotline            29.297406
Runtime              2.257283
Kind                 0.000000
Seasons            100.000000
Netflix              0.000000
Hulu                 0.000000
Prime Video          0.000000
Disney+              0.000000
Type                 0.000000
dtype: float64
**************************************************
Pictorial Representation : 
In [9]:
# ID
# df_movies = df_movies.drop(['ID'], axis = 1)
 
# Age
df_movies.loc[df_movies['Age'].isnull() & df_movies['Disney+'] == 1, "Age"] = '13'
# df_movies.fillna({'Age' : 18}, inplace = True)
df_movies.fillna({'Age' : 'NR'}, inplace = True)
df_movies['Age'].replace({'all': '0'}, inplace = True)
df_movies['Age'].replace({'7+': '7'}, inplace = True)
df_movies['Age'].replace({'13+': '13'}, inplace = True)
df_movies['Age'].replace({'16+': '16'}, inplace = True)
df_movies['Age'].replace({'18+': '18'}, inplace = True)
# df_movies['Age'] = df_movies['Age'].astype(int)
 
# IMDb
# df_movies.fillna({'IMDb' : df_movies['IMDb'].mean()}, inplace = True)
# df_movies.fillna({'IMDb' : df_movies['IMDb'].median()}, inplace = True)
df_movies.fillna({'IMDb' : "NA"}, inplace = True)
 
# Rotten Tomatoes
df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'][df_movies['Rotten Tomatoes'].notnull()].astype(int)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].mean()}, inplace = True)
# df_movies.fillna({'Rotten Tomatoes' : df_movies['Rotten Tomatoes'].median()}, inplace = True)
# df_movies['Rotten Tomatoes'] = df_movies['Rotten Tomatoes'].astype(int)
df_movies.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
 
# Directors
# df_movies = df_movies.drop(['Directors'], axis = 1)
df_movies.fillna({'Directors' : "NA"}, inplace = True)
 
# Cast
df_movies.fillna({'Cast' : "NA"}, inplace = True)
 
# Genres
df_movies.fillna({'Genres': "NA"}, inplace = True)
 
# Country
df_movies.fillna({'Country': "NA"}, inplace = True)
 
# Language
df_movies.fillna({'Language': "NA"}, inplace = True)
 
# Plotline
df_movies.fillna({'Plotline': "NA"}, inplace = True)
 
# Runtime
# df_movies.fillna({'Runtime' : df_movies['Runtime'].mean()}, inplace = True)
# df_movies['Runtime'] = df_movies['Runtime'].astype(int)
df_movies.fillna({'Runtime' : "NA"}, inplace = True)
 
# Kind
# df_movies.fillna({'Kind': "NA"}, inplace = True)
 
# Type
# df_movies.fillna({'Type': "NA"}, inplace = True)
# df_movies = df_movies.drop(['Type'], axis = 1)
 
# Seasons
# df_movies.fillna({'Seasons': 1}, inplace = True)
# df_movies.fillna({'Seasons': "NA"}, inplace = True)
df_movies = df_movies.drop(['Seasons'], axis = 1)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
# df_movies.fillna({'Seasons' : df_movies['Seasons'].mean()}, inplace = True)
# df_movies['Seasons'] = df_movies['Seasons'].astype(int)
 
# Service Provider
df_movies['Service Provider'] = df_movies.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_movies.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)

# Removing Duplicate and Missing Entries
df_movies.dropna(how = 'any', inplace = True)
df_movies.drop_duplicates(inplace = True)
In [10]:
data_investigate(df_movies)
No of Rows :  16923
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
       'Service Provider'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb                object
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime             object
Kind                object
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
Service Provider    object
dtype: object
**************************************************
Missing Values : 
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :

ID                  0.0
Title               0.0
Year                0.0
Age                 0.0
IMDb                0.0
Rotten Tomatoes     0.0
Directors           0.0
Cast                0.0
Genres              0.0
Country             0.0
Language            0.0
Plotline            0.0
Runtime             0.0
Kind                0.0
Netflix             0.0
Hulu                0.0
Prime Video         0.0
Disney+             0.0
Type                0.0
Service Provider    0.0
dtype: float64
**************************************************
Pictorial Representation : 
In [11]:
df_movies.head()
Out[11]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 1 Inception 2010 13 8.8 87 Christopher Nolan Leonardo DiCaprio,Joseph Gordon-Levitt,Elliot ... Action,Adventure,Sci-Fi,Thriller United States,United Kingdom English,Japanese,French Dom Cobb is a skilled thief, the absolute best... 148 movie 1 0 0 0 0 Netflix
1 2 The Matrix 1999 16 8.7 88 Lana Wachowski,Lilly Wachowski Keanu Reeves,Laurence Fishburne,Carrie-Anne Mo... Action,Sci-Fi United States English Thomas A. Anderson is a man living two lives. ... 136 movie 1 0 0 0 0 Netflix
2 3 Avengers: Infinity War 2018 13 8.4 85 Anthony Russo,Joe Russo Robert Downey Jr.,Chris Hemsworth,Mark Ruffalo... Action,Adventure,Sci-Fi United States English As the Avengers and their allies have continue... 149 movie 1 0 0 0 0 Netflix
3 4 Back to the Future 1985 7 8.5 96 Robert Zemeckis Michael J. Fox,Christopher Lloyd,Lea Thompson,... Adventure,Comedy,Sci-Fi United States English Marty McFly, a typical American teenager of th... 116 movie 1 0 0 0 0 Netflix
4 5 The Good, the Bad and the Ugly 1966 16 8.8 97 Sergio Leone Eli Wallach,Clint Eastwood,Lee Van Cleef,Aldo ... Western Italy,Spain,West Germany,United States Italian Blondie (The Good) (Clint Eastwood) is a profe... 161 movie 1 0 1 0 0 Netflix
In [12]:
df_movies.describe()
Out[12]:
ID Year Netflix Hulu Prime Video Disney+ Type
count 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.000000 16923.0
mean 8462.000000 2003.211901 0.214915 0.062637 0.727235 0.033150 0.0
std 4885.393638 20.526532 0.410775 0.242315 0.445394 0.179034 0.0
min 1.000000 1901.000000 0.000000 0.000000 0.000000 0.000000 0.0
25% 4231.500000 2001.000000 0.000000 0.000000 0.000000 0.000000 0.0
50% 8462.000000 2012.000000 0.000000 0.000000 1.000000 0.000000 0.0
75% 12692.500000 2016.000000 0.000000 0.000000 1.000000 0.000000 0.0
max 16923.000000 2020.000000 1.000000 1.000000 1.000000 1.000000 0.0
In [13]:
df_movies.corr()
Out[13]:
ID Year Netflix Hulu Prime Video Disney+ Type
ID 1.000000 -0.217816 -0.644470 -0.129926 0.469301 0.263530 NaN
Year -0.217816 1.000000 0.256151 0.101337 -0.255578 -0.047258 NaN
Netflix -0.644470 0.256151 1.000000 -0.118032 -0.745141 -0.089649 NaN
Hulu -0.129926 0.101337 -0.118032 1.000000 -0.284654 -0.039693 NaN
Prime Video 0.469301 -0.255578 -0.745141 -0.284654 1.000000 -0.289008 NaN
Disney+ 0.263530 -0.047258 -0.089649 -0.039693 -0.289008 1.000000 NaN
Type NaN NaN NaN NaN NaN NaN NaN
In [14]:
# df_movies.sort_values('Year', ascending = True)
# df_movies.sort_values('IMDb', ascending = False)
In [15]:
# df_movies.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_ottmovies.csv', index = False)
 
# path = '/content/drive/MyDrive/Files/'
 
# udf_movies = pd.read_csv(path + 'updated_ottmovies.csv')
 
# udf_movies
In [16]:
# df_netflix_movies = df_movies.loc[(df_movies['Netflix'] > 0)]
# df_hulu_movies = df_movies.loc[(df_movies['Hulu'] > 0)]
# df_prime_video_movies = df_movies.loc[(df_movies['Prime Video'] > 0)]
# df_disney_movies = df_movies.loc[(df_movies['Disney+'] > 0)]
In [17]:
df_netflix_only_movies = df_movies[(df_movies['Netflix'] == 1) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_hulu_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 1) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 0)]
df_prime_video_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 1 ) & (df_movies['Disney+'] == 0)]
df_disney_only_movies = df_movies[(df_movies['Netflix'] == 0) & (df_movies['Hulu'] == 0) & (df_movies['Prime Video'] == 0 ) & (df_movies['Disney+'] == 1)]
In [18]:
df_movies_years = df_movies.copy()
In [19]:
df_movies_years.drop(df_movies_years.loc[df_movies_years['Year'] == "NA"].index, inplace = True)
# df_movies_years = df_movies_years[df_movies_years.Year != "NA"]
df_movies_years['Year'] = df_movies_years['Year'].astype(int)
In [20]:
# Creating distinct dataframes only with the movies present on individual streaming platforms
netflix_years_movies = df_movies_years.loc[df_movies_years['Netflix'] == 1]
hulu_years_movies = df_movies_years.loc[df_movies_years['Hulu'] == 1]
prime_video_years_movies = df_movies_years.loc[df_movies_years['Prime Video'] == 1]
disney_years_movies = df_movies_years.loc[df_movies_years['Disney+'] == 1]
In [21]:
df_movies_years_group = df_movies_years.copy()
In [22]:
plt.figure(figsize = (10, 10))
corr = df_movies_years.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, allow annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
In [23]:
df_years_high_movies = df_movies_years.sort_values(by = 'Year', ascending = False).reset_index()
df_years_high_movies = df_years_high_movies.drop(['index'], axis = 1)
# filter = (df_movies_years['Year'] == (df_movies_years['Year'].max()))
# df_years_high_movies = df_movies_years[filter]
 
# highest_rated_movies = df_movies_years.loc[df_movies_years['Year'].idxmax()]
 
print('\nMovies with Highest Ever Year  are : \n')
df_years_high_movies.head(5)
Movies with Highest Ever Year  are : 

Out[23]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 761 The Occupant 2020 18 6.4 50 David Pastor,Àlex Pastor Javier Gutiérrez,Mario Casas,Bruna Cusí,Ruth D... Adventure,Drama,Thriller Spain Spanish NA 103 movie 1 0 0 0 0 Netflix
1 706 Go! 2020 16 7.2 88 Doug Liman Katie Holmes,Sarah Polley,Suzanne Krull,Desmon... Comedy,Crime United States English Told from three perspectives, a story of a bun... 102 movie 1 0 0 0 0 Netflix
2 682 Horse Girl 2020 16 5.9 70 Jeff Baena Alison Brie,Molly Shannon,Goldenite,Stella Che... Drama,Mystery,Thriller United States English Sarah, a socially isolated arts and crafts sto... 103 movie 1 0 0 0 0 Netflix
3 2325 The Last Thing He Wanted 2020 16 4.3 5 Dee Rees Anne Hathaway,Ben Affleck,Rosie Perez,Willem D... Crime,Drama,Thriller United States English,Spanish,French NA 115 movie 1 0 0 0 0 Netflix
4 6544 Shubh Mangal Zyada Saavdhan 2020 NR 5.8 92 Hitesh Kewalya Ayushmann Khurrana,Jitendra Kumar,Gajraj Rao,N... Comedy,Romance India Hindi An eccentric marketing guru visits a Coca-Cola... 117 movie 0 0 1 0 0 Prime Video
In [24]:
fig = px.bar(y = df_years_high_movies['Title'][:15],
             x = df_years_high_movies['Year'][:15], 
             color = df_years_high_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Highest Year in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [25]:
df_years_low_movies = df_movies_years.sort_values(by = 'Year', ascending = True).reset_index()
df_years_low_movies = df_years_low_movies.drop(['index'], axis = 1)
# filter = (df_movies_years['Year'] == (df_movies_years['Year'].min()))
# df_years_low_movies = df_movies_years[filter]

print('\nMovies with Lowest Ever Year  are : \n')
df_years_low_movies.head(5)
Movies with Lowest Ever Year  are : 

Out[25]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 16756 Space: The New Frontier 1901 16 7 NA Dave Bullock David Boreanaz,Miguel Ferrer,Neil Patrick Harr... Animation,Action,Adventure,Fantasy,Sci-Fi United States English The American Muscle Car series relives that in... 75 movie 0 0 1 0 0 Prime Video
1 4343 A Trip to the Moon 1902 0 8.2 44 Georges Méliès Victor André,Bleuette Bernon,Brunnet,Jehanne d... Short,Adventure,Comedy,Fantasy,Sci-Fi France None,French An association of astronomers has convened to ... 13 movie 0 0 1 0 0 Prime Video
2 8557 From the Manger to the Cross 1912 7 5.7 NA Sidney Olcott R. Henderson Bland,Percy Dyer,Gene Gauntier,Al... Biography,Drama United States None,English NA 60 movie 0 0 1 0 0 Prime Video
3 9636 Fatty Joins the Force 1913 NR 5.3 NA George Nichols Roscoe 'Fatty' Arbuckle,Charles Avery,Lou Bres... Comedy,Short United States None,English 7 years after the original Fortress movie, Bre... 12 movie 0 0 1 0 0 Prime Video
4 9884 The Speed Kings 1913 NR 5 NA Wilfred Lucas Ford Sterling,Mabel Normand,Teddy Tetzlaff,Ear... Short,Action,Comedy United States None,English In Fort Hernandez, San Antonio, a group of Mex... 8 movie 0 0 1 0 0 Prime Video
In [26]:
fig = px.bar(y = df_years_low_movies['Title'][:15],
             x = df_years_low_movies['Year'][:15], 
             color = df_years_low_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Lowest Year in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [27]:
print(f'''
      Total '{df_movies_years['Year'].unique().shape[0]}' unique Year s were Given, They were Like this,\n
      
{df_movies_years.sort_values(by = 'Year', ascending = False)['Year'].unique()}\n
 
      The Highest Ever Year Ever Any Movie Got is '{df_years_high_movies['Title'][0]}' : '{df_years_high_movies['Year'].max()}'\n
 
      The Lowest Ever Year Ever Any Movie Got is '{df_years_low_movies['Title'][0]}' : '{df_years_low_movies['Year'].min()}'\n
      ''')
      Total '110' unique Year s were Given, They were Like this,

      
[2020 2019 2018 2017 2016 2015 2014 2013 2012 2011 2010 2009 2008 2007
 2006 2005 2004 2003 2002 2001 2000 1999 1998 1997 1996 1995 1994 1993
 1992 1991 1990 1989 1988 1987 1986 1985 1984 1983 1982 1981 1980 1979
 1978 1977 1976 1975 1974 1973 1972 1971 1970 1969 1968 1967 1966 1965
 1964 1963 1962 1961 1960 1959 1958 1957 1956 1955 1954 1953 1952 1951
 1950 1949 1948 1947 1946 1945 1944 1943 1942 1941 1940 1939 1938 1937
 1936 1935 1934 1933 1932 1931 1930 1929 1928 1927 1926 1925 1924 1923
 1922 1921 1920 1919 1918 1917 1916 1915 1913 1912 1902 1901]

 
      The Highest Ever Year Ever Any Movie Got is 'The Occupant' : '2020'

 
      The Lowest Ever Year Ever Any Movie Got is 'Space: The New Frontier' : '1901'

      
In [28]:
netflix_years_high_movies = df_years_high_movies.loc[df_years_high_movies['Netflix']==1].reset_index()
netflix_years_high_movies = netflix_years_high_movies.drop(['index'], axis = 1)
 
netflix_years_low_movies = df_years_low_movies.loc[df_years_low_movies['Netflix']==1].reset_index()
netflix_years_low_movies = netflix_years_low_movies.drop(['index'], axis = 1)
 
netflix_years_high_movies.head(5)
Out[28]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 761 The Occupant 2020 18 6.4 50 David Pastor,Àlex Pastor Javier Gutiérrez,Mario Casas,Bruna Cusí,Ruth D... Adventure,Drama,Thriller Spain Spanish NA 103 movie 1 0 0 0 0 Netflix
1 706 Go! 2020 16 7.2 88 Doug Liman Katie Holmes,Sarah Polley,Suzanne Krull,Desmon... Comedy,Crime United States English Told from three perspectives, a story of a bun... 102 movie 1 0 0 0 0 Netflix
2 682 Horse Girl 2020 16 5.9 70 Jeff Baena Alison Brie,Molly Shannon,Goldenite,Stella Che... Drama,Mystery,Thriller United States English Sarah, a socially isolated arts and crafts sto... 103 movie 1 0 0 0 0 Netflix
3 2325 The Last Thing He Wanted 2020 16 4.3 5 Dee Rees Anne Hathaway,Ben Affleck,Rosie Perez,Willem D... Crime,Drama,Thriller United States English,Spanish,French NA 115 movie 1 0 0 0 0 Netflix
4 16245 Dracula 2020 16 7.4 74 Francis Ford Coppola Gary Oldman,Winona Ryder,Anthony Hopkins,Keanu... Horror United Kingdom,United States English,Romanian,Greek,Bulgarian,Latin In the wake of the Fronde in 1667, the French ... 128 movie 1 0 0 0 0 Netflix
In [29]:
fig = px.bar(y = netflix_years_high_movies['Title'][:15],
             x = netflix_years_high_movies['Year'][:15], 
             color = netflix_years_high_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Highest Year in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [30]:
fig = px.bar(y = netflix_years_low_movies['Title'][:15],
             x = netflix_years_low_movies['Year'][:15], 
             color = netflix_years_low_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Lowest Year in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [31]:
hulu_years_high_movies = df_years_high_movies.loc[df_years_high_movies['Hulu']==1].reset_index()
hulu_years_high_movies = hulu_years_high_movies.drop(['index'], axis = 1)
 
hulu_years_low_movies = df_years_low_movies.loc[df_years_low_movies['Hulu']==1].reset_index()
hulu_years_low_movies = hulu_years_low_movies.drop(['index'], axis = 1)
 
hulu_years_high_movies.head(5)
Out[31]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 3589 Big Time Adolescence 2020 16 7 86 Jason Orley Griffin Gluck,Larry John Meyers,Michael Devine... Comedy,Drama United States English NA 91 movie 0 1 0 0 0 Hulu
1 16550 The Happy Days of Garry Marshall 2020 NR 8.3 NA John Scheinfeld Abigail Breslin,Anne Hathaway,Jennifer Garner,... Documentary United States English NA 84 movie 0 1 0 0 0 Hulu
2 16570 Drag Me 2020 16 6.5 NA Sam Raimi Alison Lohman,Justin Long,Lorna Raver,Dileep R... Horror United States English,Spanish,Hungarian,Czech NA 99 movie 0 1 0 0 0 Hulu
3 3927 Spaceship Earth 2020 NR 6.4 89 Matt Wolf Shelley Taylor Morgan,Kathelin Gray,Marie Hard... Documentary United States English The true, stranger-than-fiction, adventure of ... 113 movie 0 1 0 0 0 Hulu
4 4308 Alien Contact 2020 NR 7 NA Rico Lowry Charles Washington Documentary United States English Actual UFO encounters between Alien spacecraft... 75 movie 0 1 1 0 0 Prime Video
In [32]:
fig = px.bar(y = hulu_years_high_movies['Title'][:15],
             x = hulu_years_high_movies['Year'][:15], 
             color = hulu_years_high_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Highest Year in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [33]:
fig = px.bar(y = hulu_years_low_movies['Title'][:15],
             x = hulu_years_low_movies['Year'][:15], 
             color = hulu_years_low_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Lowest Year in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [34]:
prime_video_years_high_movies = df_years_high_movies.loc[df_years_high_movies['Prime Video']==1].reset_index()
prime_video_years_high_movies = prime_video_years_high_movies.drop(['index'], axis = 1)
 
prime_video_years_low_movies = df_years_low_movies.loc[df_years_low_movies['Prime Video']==1].reset_index()
prime_video_years_low_movies = prime_video_years_low_movies.drop(['index'], axis = 1)
 
prime_video_years_high_movies.head(5)
Out[34]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 6544 Shubh Mangal Zyada Saavdhan 2020 NR 5.8 92 Hitesh Kewalya Ayushmann Khurrana,Jitendra Kumar,Gajraj Rao,N... Comedy,Romance India Hindi An eccentric marketing guru visits a Coca-Cola... 117 movie 0 0 1 0 0 Prime Video
1 16641 Russell Peters: Deported 2020 NR 6.3 NA David Higby Jason Collings,Vicky Kaushal,Taapsee Pannu,Rus... Comedy United States English NA 67 movie 0 0 1 0 0 Prime Video
2 6606 Happiness Continues 2020 NR 7.4 NA Anthony Mandler Joe Jonas,Kevin Jonas,Nick Jonas,Priyanka Chop... Documentary,Music NA NA Blinded since childhood when a hideous car-cra... 104 movie 0 0 1 0 0 Prime Video
3 9400 Killer Camera Monsters 2020 NR 2.6 NA Ryan McBay Sarati,Lauren Compton,Steve Filice,Bernadette ... Horror,Thriller United States English In Seattle, the successful forensic psychiatri... 86 movie 0 0 1 0 0 Prime Video
4 16694 Jayde Adams: Serious Black Jumper 2020 NR 7.1 NA Peter Orton Jayde Adams Comedy United States English NA 67 movie 0 0 1 0 0 Prime Video
In [35]:
fig = px.bar(y = prime_video_years_high_movies['Title'][:15],
             x = prime_video_years_high_movies['Year'][:15], 
             color = prime_video_years_high_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Highest Year in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [36]:
fig = px.bar(y = prime_video_years_low_movies['Title'][:15],
             x = prime_video_years_low_movies['Year'][:15], 
             color = prime_video_years_low_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Lowest Year in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [37]:
disney_years_high_movies = df_years_high_movies.loc[df_years_high_movies['Disney+']==1].reset_index()
disney_years_high_movies = disney_years_high_movies.drop(['index'], axis = 1)
 
disney_years_low_movies = df_years_low_movies.loc[df_years_low_movies['Disney+']==1].reset_index()
disney_years_low_movies = disney_years_low_movies.drop(['index'], axis = 1)
 
disney_years_high_movies.head(5)
Out[37]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Netflix Hulu Prime Video Disney+ Type Service Provider
0 16142 A Celebration of the Music from Coco 2020 13 7.1 NA Ron de Moraes Benjamin Bratt,Jaime Camil,Aran de la Peña,Fel... Music,Musical United States English After six months of scientifically advanced tr... 47 movie 0 0 0 1 0 Disney+
1 16080 Lamp Life 2020 13 6.7 NA Valerie LaPointe Annie Potts,Ally Maki,Jim Hanks,Emily Davis,Mi... Animation,Short,Adventure,Comedy,Family,Fantasy United States English When Penny and her family are invited on a cru... 7 movie 0 0 0 1 0 Disney+
2 15767 Onward 2020 7 7.4 88 Dan Scanlon Tom Holland,Chris Pratt,Julia Louis-Dreyfus,Oc... Animation,Adventure,Comedy,Family,Fantasy United States English The historical film by the American director E... 102 movie 0 0 0 1 0 Disney+
3 16061 The Disney Family Singalong 2020 0 7.8 NA Hamish Hamilton,James B. Merryman Christina Aguilera,Erin Andrews,Joshua Bassett... Family,Musical United States English After committing check fraud, Preston Waters l... NA movie 0 0 0 1 0 Disney+
4 16171 Penguins: Life on the Edge 2020 0 6.9 NA Alastair Fothergill,Jeff Wilson Blair Underwood,Matthew Aeberhard,John Aitchis... Documentary,Family United States English,French In this film, edited from eight episodes of Di... 78 movie 0 0 0 1 0 Disney+
In [38]:
fig = px.bar(y = disney_years_high_movies['Title'][:15],
             x = disney_years_high_movies['Year'][:15], 
             color = disney_years_high_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Highest Year in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [39]:
fig = px.bar(y = disney_years_low_movies['Title'][:15],
             x = disney_years_low_movies['Year'][:15], 
             color = disney_years_low_movies['Year'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Lowest Year in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [40]:
print(f'''
      The Movie with Highest Year  Ever Got is '{df_years_high_movies['Title'][0]}' : '{df_years_high_movies['Year'].max()}'\n
      The Movie with Lowest Year  Ever Got is '{df_years_low_movies['Title'][0]}' : '{df_years_low_movies['Year'].min()}'\n
      
      The Movie with Highest Year  on 'Netflix' is '{netflix_years_high_movies['Title'][0]}' : '{netflix_years_high_movies['Year'].max()}'\n
      The Movie with Lowest Year  on 'Netflix' is '{netflix_years_low_movies['Title'][0]}' : '{netflix_years_low_movies['Year'].min()}'\n
      
      The Movie with Highest Year  on 'Hulu' is '{hulu_years_high_movies['Title'][0]}' : '{hulu_years_high_movies['Year'].max()}'\n
      The Movie with Lowest Year  on 'Hulu' is '{hulu_years_low_movies['Title'][0]}' : '{hulu_years_low_movies['Year'].min()}'\n
      
      The Movie with Highest Year  on 'Prime Video' is '{prime_video_years_high_movies['Title'][0]}' : '{prime_video_years_high_movies['Year'].max()}'\n
      The Movie with Lowest Year  on 'Prime Video' is '{prime_video_years_low_movies['Title'][0]}' : '{prime_video_years_low_movies['Year'].min()}'\n
      
      The Movie with Highest Year  on 'Disney+' is '{disney_years_high_movies['Title'][0]}' : '{disney_years_high_movies['Year'].max()}'\n
      The Movie with Lowest Year  on 'Disney+' is '{disney_years_low_movies['Title'][0]}' : '{disney_years_low_movies['Year'].min()}'\n 
      ''')
      The Movie with Highest Year  Ever Got is 'The Occupant' : '2020'

      The Movie with Lowest Year  Ever Got is 'Space: The New Frontier' : '1901'

      
      The Movie with Highest Year  on 'Netflix' is 'The Occupant' : '2020'

      The Movie with Lowest Year  on 'Netflix' is 'The Battle of Midway' : '1942'

      
      The Movie with Highest Year  on 'Hulu' is 'Big Time Adolescence' : '2020'

      The Movie with Lowest Year  on 'Hulu' is 'The Hunchback of Notre Dame' : '1923'

      
      The Movie with Highest Year  on 'Prime Video' is 'Shubh Mangal Zyada Saavdhan' : '2020'

      The Movie with Lowest Year  on 'Prime Video' is 'Space: The New Frontier' : '1901'

      
      The Movie with Highest Year  on 'Disney+' is 'A Celebration of the Music from Coco' : '2020'

      The Movie with Lowest Year  on 'Disney+' is 'The Three Musketeers' : '1921'
 
      
In [41]:
print(f'''
      Accross All Platforms the Average Year  is '{round(df_movies_years['Year'].mean(), ndigits = 2)}'\n
      The Average Year  on 'Netflix' is '{round(netflix_years_movies['Year'].mean(), ndigits = 2)}'\n
      The Average Year  on 'Hulu' is '{round(hulu_years_movies['Year'].mean(), ndigits = 2)}'\n
      The Average Year  on 'Prime Video' is '{round(prime_video_years_movies['Year'].mean(), ndigits = 2)}'\n
      The Average Year  on 'Disney+' is '{round(disney_years_movies['Year'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Year  is '2003.21'

      The Average Year  on 'Netflix' is '2013.26'

      The Average Year  on 'Hulu' is '2011.26'

      The Average Year  on 'Prime Video' is '2000.0'

      The Average Year  on 'Disney+' is '1997.97'
 
      
In [42]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_movies_years['Year'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_movies_years['Year'], ax = ax[1])
plt.show()
In [43]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Year s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_years_movies['Year'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_years_movies['Year'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_years_movies['Year'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_years_movies['Year'][:100], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [44]:
year_count = df_movies_years.groupby('Year')['Title'].count()
year_movies = df_movies_years.groupby('Year')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
year_data_movies = pd.concat([year_count, year_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count'})
year_data_movies = year_data_movies.sort_values(by = 'Movies Count', ascending = False)
In [45]:
# Movies Count per Year - All Platforms Combined
year_data_movies.head()
Out[45]:
Year Movies Count Netflix Hulu Prime Video Disney+
106 2017 1449 576 140 792 22
107 2018 1287 561 168 618 15
105 2016 1236 451 81 735 17
104 2015 1090 281 78 761 10
103 2014 991 174 60 774 10
In [46]:
fig = px.bar(y = year_data_movies['Movies Count'],
             x = year_data_movies['Year'], 
             color = year_data_movies['Year'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies Count', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Year : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [47]:
fig = px.pie(year_data_movies[:10],
             names = year_data_movies['Year'][:10],
             values = year_data_movies['Movies Count'][:10],
             color = year_data_movies['Movies Count'][:10],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'Movies Count based on Year Group')
fig.show()
In [48]:
# Highest Movies Count per Year - All Platforms Combined
df_year_high_movies = year_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_year_high_movies = df_year_high_movies.drop(['index'], axis = 1)
# filter = (year_data_movies['Movies Count'] = =  (year_data_movies['Movies Count'].max()))
# df_year_high_movies = year_data_movies[filter]
 
# highest_rated_movies = year_data_movies.loc[year_data_movies['Movies Count'].idxmax()]
 
print('\nYear with Highest Ever Movies Count are : All Platforms Combined\n')
df_year_high_movies.head(5)
Year with Highest Ever Movies Count are : All Platforms Combined

Out[48]:
Year Movies Count Netflix Hulu Prime Video Disney+
0 2017 1449 576 140 792 22
1 2018 1287 561 168 618 15
2 2016 1236 451 81 735 17
3 2015 1090 281 78 761 10
4 2014 991 174 60 774 10
In [49]:
fig = px.bar(y = df_year_high_movies['Movies Count'][:10],
             x = df_year_high_movies['Year'][:10], 
             color = df_year_high_movies['Movies Count'][:10],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies Count', 'x' : 'Year : In Minutes'},
             title  = 'Year with Highest Movies Count : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [50]:
# Lowest Movies Count per Year - All Platforms Combined
df_year_low_movies = year_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_year_low_movies = df_year_low_movies.drop(['index'], axis = 1)
# filter = (year_data_movies['Movies Count'] = =  (year_data_movies['Movies Count'].min()))
# df_year_low_movies = year_data_movies[filter]
 
print('\nYear with Lowest Ever Movies Count are : All Platforms Combined\n')
df_year_low_movies.head(5)
Year with Lowest Ever Movies Count are : All Platforms Combined

Out[50]:
Year Movies Count Netflix Hulu Prime Video Disney+
0 1901 1 0 0 1 0
1 1912 1 0 0 1 0
2 1902 1 0 0 1 0
3 1927 1 0 0 1 0
4 1916 1 0 0 1 0
In [51]:
fig = px.bar(y = df_year_low_movies['Movies Count'][:10],
             x = df_year_low_movies['Year'][:10], 
             color = df_year_low_movies['Movies Count'][:10],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies Count', 'x' : 'Year : In Minutes'},
             title  = 'Year with Lowest Movies Count : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [52]:
print(f'''
      Total '{df_movies_years['Year'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see Movies from Total '{year_data_movies['Year'].unique().shape[0]}' Year, They were Like this, \n
 
      {year_data_movies.sort_values(by = 'Movies Count', ascending = False)['Year'].head(5).unique()} etc. \n
 
      The Year with Highest Movies Count have '{year_data_movies['Movies Count'].max()}' Movies Available is '{df_year_high_movies['Year'][0]}', &\n
      The Year with Lowest Movies Count have '{year_data_movies['Movies Count'].min()}' Movies Available is '{df_year_low_movies['Year'][0]}'
      ''')
      Total '16923' Titles are available on All Platforms, out of which

      You Can Choose to see Movies from Total '110' Year, They were Like this, 

 
      [2017 2018 2016 2015 2014] etc. 

 
      The Year with Highest Movies Count have '1449' Movies Available is '2017', &

      The Year with Lowest Movies Count have '1' Movies Available is '1901'
      
In [53]:
# Highest Movies Count per Year - Netflix
netflix_year_movies = year_data_movies[year_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_year_movies = netflix_year_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
netflix_year_high_movies = df_year_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_year_high_movies = netflix_year_high_movies.drop(['index'], axis = 1)
 
netflix_year_low_movies = df_year_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_year_low_movies = netflix_year_low_movies.drop(['index'], axis = 1)
 
netflix_year_high_movies.head(5)
Out[53]:
Year Movies Count Netflix Hulu Prime Video Disney+
0 2017 1449 576 140 792 22
1 2018 1287 561 168 618 15
2 2016 1236 451 81 735 17
3 2019 703 430 119 164 24
4 2015 1090 281 78 761 10
In [54]:
# Highest Movies Count per Year - Hulu
hulu_year_movies = year_data_movies[year_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_year_movies = hulu_year_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
hulu_year_high_movies = df_year_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_year_high_movies = hulu_year_high_movies.drop(['index'], axis = 1)
 
hulu_year_low_movies = df_year_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_year_low_movies = hulu_year_low_movies.drop(['index'], axis = 1)
 
hulu_year_high_movies.head(5)
Out[54]:
Year Movies Count Netflix Hulu Prime Video Disney+
0 2018 1287 561 168 618 15
1 2017 1449 576 140 792 22
2 2019 703 430 119 164 24
3 2016 1236 451 81 735 17
4 2015 1090 281 78 761 10
In [55]:
# Highest Movies Count per Year - Prime Video
prime_video_year_movies = year_data_movies[year_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_year_movies = prime_video_year_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
 
prime_video_year_high_movies = df_year_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_year_high_movies = prime_video_year_high_movies.drop(['index'], axis = 1)
 
prime_video_year_low_movies = df_year_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_year_low_movies = prime_video_year_low_movies.drop(['index'], axis = 1)
 
prime_video_year_high_movies.head(5)
Out[55]:
Year Movies Count Netflix Hulu Prime Video Disney+
0 2013 979 139 51 815 12
1 2017 1449 576 140 792 22
2 2014 991 174 60 774 10
3 2015 1090 281 78 761 10
4 2016 1236 451 81 735 17
In [56]:
# Highest Movies Count per Year - Disney+
disney_year_movies = year_data_movies[year_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_year_movies = disney_year_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
 
disney_year_high_movies = df_year_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_year_high_movies = disney_year_high_movies.drop(['index'], axis = 1)
 
disney_year_low_movies = df_year_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_year_low_movies = disney_year_low_movies.drop(['index'], axis = 1)
 
disney_year_high_movies.head(5)
Out[56]:
Year Movies Count Netflix Hulu Prime Video Disney+
0 2003 214 31 6 157 25
1 2019 703 430 119 164 24
2 2017 1449 576 140 792 22
3 2000 176 19 8 132 21
4 2002 196 21 9 152 21
In [57]:
print(f'''
      The Year with Highest Movies Count Ever Got is '{df_year_high_movies['Year'][0]}' : '{df_year_high_movies['Movies Count'].max()}'\n
      The Year with Lowest Movies Count Ever Got is '{df_year_low_movies['Year'][0]}' : '{df_year_low_movies['Movies Count'].min()}'\n
      
      The Year with Highest Movies Count on 'Netflix' is '{netflix_year_high_movies['Year'][0]}' : '{netflix_year_high_movies['Netflix'].max()}'\n
      The Year with Lowest Movies Count on 'Netflix' is '{netflix_year_low_movies['Year'][0]}' : '{netflix_year_low_movies['Netflix'].min()}'\n
      
      The Year with Highest Movies Count on 'Hulu' is '{hulu_year_high_movies['Year'][0]}' : '{hulu_year_high_movies['Hulu'].max()}'\n
      The Year with Lowest Movies Count on 'Hulu' is '{hulu_year_low_movies['Year'][0]}' : '{hulu_year_low_movies['Hulu'].min()}'\n
      
      The Year with Highest Movies Count on 'Prime Video' is '{prime_video_year_high_movies['Year'][0]}' : '{prime_video_year_high_movies['Prime Video'].max()}'\n
      The Year with Lowest Movies Count on 'Prime Video' is '{prime_video_year_low_movies['Year'][0]}' : '{prime_video_year_low_movies['Prime Video'].min()}'\n
      
      The Year with Highest Movies Count on 'Disney+' is '{disney_year_high_movies['Year'][0]}' : '{disney_year_high_movies['Disney+'].max()}'\n
      The Year with Lowest Movies Count on 'Disney+' is '{disney_year_low_movies['Year'][0]}' : '{disney_year_low_movies['Disney+'].min()}'\n 
      ''')
      The Year with Highest Movies Count Ever Got is '2017' : '1449'

      The Year with Lowest Movies Count Ever Got is '1901' : '1'

      
      The Year with Highest Movies Count on 'Netflix' is '2017' : '576'

      The Year with Lowest Movies Count on 'Netflix' is '1941' : '0'

      
      The Year with Highest Movies Count on 'Hulu' is '2018' : '168'

      The Year with Lowest Movies Count on 'Hulu' is '1941' : '0'

      
      The Year with Highest Movies Count on 'Prime Video' is '2013' : '815'

      The Year with Lowest Movies Count on 'Prime Video' is '1901' : '1'

      
      The Year with Highest Movies Count on 'Disney+' is '2003' : '25'

      The Year with Lowest Movies Count on 'Disney+' is '1901' : '0'
 
      
In [58]:
print(f'''
      Accross All Platforms the Average Movies Count of Year is '{round(year_data_movies['Movies Count'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Year on 'Netflix' is '{round(netflix_year_movies['Netflix'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Year on 'Hulu' is '{round(hulu_year_movies['Hulu'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Year on 'Prime Video' is '{round(prime_video_year_movies['Prime Video'].mean(), ndigits = 2)}'\n
      The Average Movies Count of Year on 'Disney+' is '{round(disney_year_movies['Disney+'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Movies Count of Year is '153.85'

      The Average Movies Count of Year on 'Netflix' is '58.66'

      The Average Movies Count of Year on 'Hulu' is '16.83'

      The Average Movies Count of Year on 'Prime Video' is '111.88'

      The Average Movies Count of Year on 'Disney+' is '6.76'
 
      
In [59]:
print(f'''
      Accross All Platforms Total Count of Year is '{year_data_movies['Year'].unique().shape[0]}'\n
      Total Count of Year on 'Netflix' is '{netflix_year_movies['Year'].unique().shape[0]}'\n
      Total Count of Year on 'Hulu' is '{hulu_year_movies['Year'].unique().shape[0]}'\n
      Total Count of Year on 'Prime Video' is '{prime_video_year_movies['Year'].unique().shape[0]}'\n
      Total Count of Year on 'Disney+' is '{disney_year_movies['Year'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Year is '110'

      Total Count of Year on 'Netflix' is '62'

      Total Count of Year on 'Hulu' is '63'

      Total Count of Year on 'Prime Video' is '110'

      Total Count of Year on 'Disney+' is '83'
 
      
In [60]:
fig = plt.figure(figsize = (20, 10))
sns.lineplot(data = year_data_movies, x = 'Year', y = 'Movies Count')
plt.show()
In [61]:
plt.figure(figsize = (20, 10))
sns.lineplot(x = year_data_movies['Year'], y = year_data_movies['Netflix'], color = 'red')
sns.lineplot(x = year_data_movies['Year'], y = year_data_movies['Hulu'], color = 'lightgreen')
sns.lineplot(x = year_data_movies['Year'], y = year_data_movies['Prime Video'], color = 'lightblue')
sns.lineplot(x = year_data_movies['Year'], y = year_data_movies['Disney+'], color = 'darkblue')
plt.xlabel('Release Year', fontsize = 15)
plt.ylabel('Movies Count', fontsize = 15)
plt.show()
In [62]:
fig, axes = plt.subplots(2, 2,figsize=(20 ,20))
 
n_y_ax1 = sns.lineplot(x = year_data_movies['Year'], y = year_data_movies['Netflix'], color = 'red', ax = axes[0, 0])
h_y_ax2 = sns.lineplot(x = year_data_movies['Year'], y = year_data_movies['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_y_ax3 = sns.lineplot(x = year_data_movies['Year'], y = year_data_movies['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_y_ax4 = sns.lineplot(x = year_data_movies['Year'], y = year_data_movies['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_y_ax1.title.set_text(labels[0])
h_y_ax2.title.set_text(labels[1])
p_y_ax3.title.set_text(labels[2])
d_y_ax4.title.set_text(labels[3])
 
plt.show()
In [63]:
def round_val(data):
    if str(data) != 'nan':
        return round(data)
        
def round_fix(data):
    if data in range(1801,1901):
        # print(data)
        return 1900
    if data in range(1901,1911):
        return 1910
    if data in range(1911,1921):
        return 1920
    if data in range(1921,1931):
        return 1930
    if data in range(1931,1941):
        return 1940
    if data in range(1941,1951):
        return 1950
    if data in range(1951,1961):
        return 1960
    if data in range(1961,1971):
        return 1970
    if data in range(1971,1981):
        return 1980
    if data in range(1981,1991):
        return 1990
    if data in range(1991,2001):
        return 2000
    if data in range(2000,2011):
        return 2010
    if data in range(2010,2021):
        return 2020
    if data in range(2020,2031):
        return 2030
    else:
        return 2100
In [64]:
df_movies_years_group['Year Group'] = df_movies_years_group['Year'].apply(round_fix).astype(int)
 
years_values = df_movies_years_group['Year Group'].value_counts().sort_index(ascending = False).tolist()
years_index = df_movies_years_group['Year Group'].value_counts().sort_index(ascending = False).index
 
# years_values, years_index
In [65]:
years_group_count = df_movies_years_group.groupby('Year Group')['Title'].count()
years_group_movies = df_movies_years_group.groupby('Year Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
years_group_data_movies = pd.concat([years_group_count, years_group_movies], axis = 1).reset_index().rename(columns = {'Title' : 'Movies Count'})
years_group_data_movies = years_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
In [66]:
# Year Group with Movies Counts - All Platforms Combined
years_group_data_movies.sort_values(by = 'Movies Count', ascending = False)
Out[66]:
Year Group Movies Count Netflix Hulu Prime Video Disney+
11 2020 9410 2939 795 5935 155
10 2010 3312 444 145 2650 174
9 2000 1119 131 56 872 105
7 1980 785 35 15 725 33
8 1990 746 69 30 652 32
6 1970 385 6 6 357 21
3 1940 369 0 2 363 4
4 1950 366 11 3 352 10
5 1960 366 2 7 338 23
2 1930 44 0 1 42 4
1 1920 19 0 0 19 0
0 1910 2 0 0 2 0
In [67]:
years_group_data_movies.sort_values(by = 'Year Group', ascending = False)
Out[67]:
Year Group Movies Count Netflix Hulu Prime Video Disney+
11 2020 9410 2939 795 5935 155
10 2010 3312 444 145 2650 174
9 2000 1119 131 56 872 105
8 1990 746 69 30 652 32
7 1980 785 35 15 725 33
6 1970 385 6 6 357 21
5 1960 366 2 7 338 23
4 1950 366 11 3 352 10
3 1940 369 0 2 363 4
2 1930 44 0 1 42 4
1 1920 19 0 0 19 0
0 1910 2 0 0 2 0
In [68]:
fig = px.bar(y = years_group_data_movies['Movies Count'],
             x = years_group_data_movies['Year Group'], 
             color = years_group_data_movies['Year Group'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'Movies Count', 'x' : 'Year : In Minutes'},
             title  = 'Movies with Group Year in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [69]:
fig = px.pie(years_group_data_movies[:10],
             names = years_group_data_movies['Year Group'],
             values = years_group_data_movies['Movies Count'],
             color = years_group_data_movies['Movies Count'],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'Movies Count based on Year Group')
fig.show()
In [70]:
df_years_group_high_movies = years_group_data_movies.sort_values(by = 'Movies Count', ascending = False).reset_index()
df_years_group_high_movies = df_years_group_high_movies.drop(['index'], axis = 1)
# filter = (years_group_data_movies['Movies Count'] ==  (years_group_data_movies['Movies Count'].max()))
# df_years_group_high_movies = years_group_data_movies[filter]
 
# highest_rated_movies = years_group_data_movies.loc[years_group_data_movies['Movies Count'].idxmax()]
 
# print('\nYear with Highest Ever Movies Count are : All Platforms Combined\n')
df_years_group_high_movies.head(5)
Out[70]:
Year Group Movies Count Netflix Hulu Prime Video Disney+
0 2020 9410 2939 795 5935 155
1 2010 3312 444 145 2650 174
2 2000 1119 131 56 872 105
3 1980 785 35 15 725 33
4 1990 746 69 30 652 32
In [71]:
df_years_group_low_movies = years_group_data_movies.sort_values(by = 'Movies Count', ascending = True).reset_index()
df_years_group_low_movies = df_years_group_low_movies.drop(['index'], axis = 1)
# filter = (years_group_data_movies['Movies Count'] = =  (years_group_data_movies['Movies Count'].min()))
# df_years_group_low_movies = years_group_data_movies[filter]
 
# print('\nYear with Lowest Ever Movies Count are : All Platforms Combined\n')
df_years_group_low_movies.head(5)
Out[71]:
Year Group Movies Count Netflix Hulu Prime Video Disney+
0 1910 2 0 0 2 0
1 1920 19 0 0 19 0
2 1930 44 0 1 42 4
3 1950 366 11 3 352 10
4 1960 366 2 7 338 23
In [72]:
print(f'''
      Total '{df_movies_years['Year'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see Movies from Total '{years_group_data_movies['Year Group'].unique().shape[0]}' Year Group, They were Like this, \n
 
      {years_group_data_movies.sort_values(by = 'Movies Count', ascending = False)['Year Group'].unique()} etc. \n
 
      The Year Group with Highest Movies Count have '{years_group_data_movies['Movies Count'].max()}' Movies Available is '{df_years_group_high_movies['Year Group'][0]}', &\n
      The Year Group with Lowest Movies Count have '{years_group_data_movies['Movies Count'].min()}' Movies Available is '{df_years_group_low_movies['Year Group'][0]}'
      ''')
      Total '16923' Titles are available on All Platforms, out of which

      You Can Choose to see Movies from Total '12' Year Group, They were Like this, 

 
      [2020 2010 2000 1980 1990 1970 1940 1950 1960 1930 1920 1910] etc. 

 
      The Year Group with Highest Movies Count have '9410' Movies Available is '2020', &

      The Year Group with Lowest Movies Count have '2' Movies Available is '1910'
      
In [73]:
netflix_years_group_movies = years_group_data_movies[years_group_data_movies['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_years_group_movies = netflix_years_group_movies.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
netflix_years_group_high_movies = df_years_group_high_movies.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_years_group_high_movies = netflix_years_group_high_movies.drop(['index'], axis = 1)
 
netflix_years_group_low_movies = df_years_group_high_movies.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_years_group_low_movies = netflix_years_group_low_movies.drop(['index'], axis = 1)
 
netflix_years_group_high_movies.head(5)
Out[73]:
Year Group Movies Count Netflix Hulu Prime Video Disney+
0 2020 9410 2939 795 5935 155
1 2010 3312 444 145 2650 174
2 2000 1119 131 56 872 105
3 1990 746 69 30 652 32
4 1980 785 35 15 725 33
In [74]:
hulu_years_group_movies = years_group_data_movies[years_group_data_movies['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_years_group_movies = hulu_years_group_movies.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'Movies Count'], axis = 1)
 
hulu_years_group_high_movies = df_years_group_high_movies.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_years_group_high_movies = hulu_years_group_high_movies.drop(['index'], axis = 1)
 
hulu_years_group_low_movies = df_years_group_high_movies.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_years_group_low_movies = hulu_years_group_low_movies.drop(['index'], axis = 1)
 
hulu_years_group_high_movies.head(5)
Out[74]:
Year Group Movies Count Netflix Hulu Prime Video Disney+
0 2020 9410 2939 795 5935 155
1 2010 3312 444 145 2650 174
2 2000 1119 131 56 872 105
3 1990 746 69 30 652 32
4 1980 785 35 15 725 33
In [75]:
prime_video_years_group_movies = years_group_data_movies[years_group_data_movies['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_years_group_movies = prime_video_years_group_movies.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'Movies Count'], axis = 1)
 
prime_video_years_group_high_movies = df_years_group_high_movies.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_years_group_high_movies = prime_video_years_group_high_movies.drop(['index'], axis = 1)
 
prime_video_years_group_low_movies = df_years_group_high_movies.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_years_group_low_movies = prime_video_years_group_low_movies.drop(['index'], axis = 1)
 
prime_video_years_group_high_movies.head(5)
Out[75]:
Year Group Movies Count Netflix Hulu Prime Video Disney+
0 2020 9410 2939 795 5935 155
1 2010 3312 444 145 2650 174
2 2000 1119 131 56 872 105
3 1980 785 35 15 725 33
4 1990 746 69 30 652 32
In [76]:
disney_years_group_movies = years_group_data_movies[years_group_data_movies['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_years_group_movies = disney_years_group_movies.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'Movies Count'], axis = 1)
 
disney_years_group_high_movies = df_years_group_high_movies.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_years_group_high_movies = disney_years_group_high_movies.drop(['index'], axis = 1)
 
disney_years_group_low_movies = df_years_group_high_movies.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_years_group_low_movies = disney_years_group_low_movies.drop(['index'], axis = 1)
 
disney_years_group_high_movies.head(5)
Out[76]:
Year Group Movies Count Netflix Hulu Prime Video Disney+
0 2010 3312 444 145 2650 174
1 2020 9410 2939 795 5935 155
2 2000 1119 131 56 872 105
3 1980 785 35 15 725 33
4 1990 746 69 30 652 32
In [77]:
print(f'''
      The Year Group with Highest Movies Count Ever Got is '{df_years_group_high_movies['Year Group'][0]}' : '{df_years_group_high_movies['Movies Count'].max()}'\n
      The Year Group with Lowest Movies Count Ever Got is '{df_years_group_low_movies['Year Group'][0]}' : '{df_years_group_low_movies['Movies Count'].min()}'\n
      
      The Year Group with Highest Movies Count on 'Netflix' is '{netflix_years_group_high_movies['Year Group'][0]}' : '{netflix_years_group_high_movies['Netflix'].max()}'\n
      The Year Group with Lowest Movies Count on 'Netflix' is '{netflix_years_group_low_movies['Year Group'][0]}' : '{netflix_years_group_low_movies['Netflix'].min()}'\n
      
      The Year Group with Highest Movies Count on 'Hulu' is '{hulu_years_group_high_movies['Year Group'][0]}' : '{hulu_years_group_high_movies['Hulu'].max()}'\n
      The Year Group with Lowest Movies Count on 'Hulu' is '{hulu_years_group_low_movies['Year Group'][0]}' : '{hulu_years_group_low_movies['Hulu'].min()}'\n
      
      The Year Group with Highest Movies Count on 'Prime Video' is '{prime_video_years_group_high_movies['Year Group'][0]}' : '{prime_video_years_group_high_movies['Prime Video'].max()}'\n
      The Year Group with Lowest Movies Count on 'Prime Video' is '{prime_video_years_group_low_movies['Year Group'][0]}' : '{prime_video_years_group_low_movies['Prime Video'].min()}'\n
      
      The Year Group with Highest Movies Count on 'Disney+' is '{disney_years_group_high_movies['Year Group'][0]}' : '{disney_years_group_high_movies['Disney+'].max()}'\n
      The Year Group with Lowest Movies Count on 'Disney+' is '{disney_years_group_low_movies['Year Group'][0]}' : '{disney_years_group_low_movies['Disney+'].min()}'\n 
      ''')
      The Year Group with Highest Movies Count Ever Got is '2020' : '9410'

      The Year Group with Lowest Movies Count Ever Got is '1910' : '2'

      
      The Year Group with Highest Movies Count on 'Netflix' is '2020' : '2939'

      The Year Group with Lowest Movies Count on 'Netflix' is '1940' : '0'

      
      The Year Group with Highest Movies Count on 'Hulu' is '2020' : '795'

      The Year Group with Lowest Movies Count on 'Hulu' is '1920' : '0'

      
      The Year Group with Highest Movies Count on 'Prime Video' is '2020' : '5935'

      The Year Group with Lowest Movies Count on 'Prime Video' is '1910' : '2'

      
      The Year Group with Highest Movies Count on 'Disney+' is '2010' : '174'

      The Year Group with Lowest Movies Count on 'Disney+' is '1920' : '0'
 
      
In [78]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = netflix_years_group_movies['Year Group'], y = netflix_years_group_movies['Netflix'], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = hulu_years_group_movies['Year Group'], y = hulu_years_group_movies['Hulu'], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = prime_video_years_group_movies['Year Group'], y = prime_video_years_group_movies['Prime Video'], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = disney_years_group_movies['Year Group'], y = disney_years_group_movies['Disney+'], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()
In [79]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Netflix'], color = 'red')
sns.lineplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Hulu'], color = 'lightgreen')
sns.lineplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Prime Video'], color = 'lightblue')
sns.lineplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Disney+'], color = 'darkblue')
plt.xlabel('Year Group', fontsize = 15)
plt.ylabel('Movies Count', fontsize = 15)
plt.show()
In [80]:
print(f'''
      Accross All Platforms Total Count of Year Group is '{years_group_data_movies['Year Group'].unique().shape[0]}'\n
      Total Count of Year Group on 'Netflix' is '{netflix_years_group_movies['Year Group'].unique().shape[0]}'\n
      Total Count of Year Group on 'Hulu' is '{hulu_years_group_movies['Year Group'].unique().shape[0]}'\n
      Total Count of Year Group on 'Prime Video' is '{prime_video_years_group_movies['Year Group'].unique().shape[0]}'\n
      Total Count of Year Group on 'Disney+' is '{disney_years_group_movies['Year Group'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Year Group is '12'

      Total Count of Year Group on 'Netflix' is '8'

      Total Count of Year Group on 'Hulu' is '10'

      Total Count of Year Group on 'Prime Video' is '12'

      Total Count of Year Group on 'Disney+' is '10'
 
      
In [81]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.lineplot(y = years_group_data_movies['Year Group'], x = years_group_data_movies['Netflix'], color = 'red', ax = axes[0, 0])
h_ru_ax2 = sns.lineplot(y = years_group_data_movies['Year Group'], x = years_group_data_movies['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_ru_ax3 = sns.lineplot(y = years_group_data_movies['Year Group'], x = years_group_data_movies['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_ru_ax4 = sns.lineplot(y = years_group_data_movies['Year Group'], x = years_group_data_movies['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])

plt.show()
In [82]:
fig, axes = plt.subplots(2, 2,figsize=(20 ,20))
 
n_yg_ax1 = sns.lineplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Netflix'], color = 'red', ax = axes[0, 0])
h_yg_ax2 = sns.lineplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_yg_ax3 = sns.lineplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_yg_ax4 = sns.lineplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_yg_ax1.title.set_text(labels[0])
h_yg_ax2.title.set_text(labels[1])
p_yg_ax3.title.set_text(labels[2])
d_yg_ax4.title.set_text(labels[3])
 
plt.show()
In [83]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Netflix'], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Hulu'], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Prime Video'], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = years_group_data_movies['Year Group'], y = years_group_data_movies['Disney+'], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])

plt.show()